3. Train-Predict-XGBoost

Tensorboard

Input at command: tensorboard --logdir=./log
Input at browser: http://127.0.0.1:6006



In [46]:

    
import time
import os
import pandas as pd

project_name = 'Dog_Breed_Identification'
step_name = 'Train-Predict-XGBoost'
time_str = time.strftime("%Y%m%d_%H%M%S", time.localtime())
run_name = project_name + '_' + step_name + '_' + time_str
print('run_name: ' + run_name)

cwd = os.getcwd()
log_path = os.path.join(cwd, 'log')
model_path = os.path.join(cwd, 'model')
output_path = os.path.join(cwd, 'output')
print('log_path: \t' + log_path)
print('model_path: \t' + model_path)
print('output_path: \t' + output_path)









    



run_name: Dog_Breed_Identification_Train-Predict-XGBoost_20171101_221638
log_path: 	E:\Udacity\MachineLearning(Advanced)\p6_graduation_project\log
model_path: 	E:\Udacity\MachineLearning(Advanced)\p6_graduation_project\model
output_path: 	E:\Udacity\MachineLearning(Advanced)\p6_graduation_project\output



In [27]:

    
df = pd.read_csv(os.path.join(cwd, 'input', 'labels.csv'))
print('lables amount: %d' %len(df))
df.head()









    



lables amount: 10222






    Out[27]:







  
    
      
      id
      breed
    
  
  
    
      0
      000bec180eb18c7604dcecc8fe0dba07
      boston_bull
    
    
      1
      001513dfcb2ffafc82cccf4d8bbaba97
      dingo
    
    
      2
      001cdf01b096e06d78e9e5112d419397
      pekinese
    
    
      3
      00214f311d5d2247d5dfe4fe24b2303d
      bluetick
    
    
      4
      0021f9ceb3235effd7fcde7f7538ed62
      golden_retriever



In [28]:

    
import h5py
import numpy as np
from sklearn.utils import shuffle
np.random.seed(2017)

x_train = []
y_train = {}
x_val = []
y_val = {}
x_test = []

cwd = os.getcwd()
feature_cgg16 = os.path.join(cwd, 'model', 'feature_VGG16_{}.h5'.format(20171026))
feature_cgg19 = os.path.join(cwd, 'model', 'feature_VGG19_{}.h5'.format(20171026))
feature_resnet50 = os.path.join(cwd, 'model', 'feature_ResNet50_{}.h5'.format(20171026))
feature_xception = os.path.join(cwd, 'model', 'feature_Xception_{}.h5'.format(20171026))
feature_inception = os.path.join(cwd, 'model', 'feature_InceptionV3_{}.h5'.format(20171026))
# feature_inceptionResNetV2 = os.path.join(cwd, 'model', 'feature_InceptionResNetV2_{}.h5'.format(20171028))
for filename in [feature_cgg16, feature_cgg19, feature_resnet50, feature_xception, feature_inception]:
    with h5py.File(filename, 'r') as h:
        x_train.append(np.array(h['train']))
        y_train = np.array(h['train_labels'])
        x_test.append(np.array(h['test']))

# print(x_train[0].shape)
x_train = np.concatenate(x_train, axis=-1)
# y_train = np.concatenate(y_train, axis=0)
# x_val = np.concatenate(x_val, axis=-1)
# y_val = np.concatenate(y_val, axis=0)
x_test = np.concatenate(x_test, axis=-1)
print(x_train.shape)
print(x_train.shape[1:])

print(len(y_train))
# print(x_val.shape)
# print(len(y_val))
print(x_test.shape)









    



(10222, 7168)
(7168,)
10222
(10357, 7168)



In [29]:

    
from sklearn.utils import shuffle
(x_train, y_train) = shuffle(x_train, y_train)



In [30]:

    
from sklearn.model_selection import train_test_split
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.05, random_state=2017)
print(x_train.shape)
print(y_train.shape)
print(x_val.shape)
print(y_val.shape)









    



(9710, 7168)
(9710,)
(512, 7168)
(512,)



In [31]:

    
from keras.utils.np_utils import to_categorical

# y_train = to_categorical(y_train)
# y_val = to_categorical(y_val)
print(y_train.shape)
print(y_val.shape)









    



(9710,)
(512,)

Build Model



In [32]:

    
import xgboost as xgb
from sklearn.metrics import accuracy_score



In [43]:

    
%%time
xg_train = xgb.DMatrix(x_train, label=y_train)
xg_val = xgb.DMatrix(x_val, label=y_val)
xg_test = xgb.DMatrix(x_test)
# setup parameters for xgboost
param = {}
# use softmax multi-class classification
param['objective'] = 'multi:softmax'
# scale weight of positive examples
param['eta'] = 0.1
param['max_depth'] = 50
param['silent'] = 1
param['nthread'] = 4
param['num_class'] = 120

watchlist = [(xg_train, 'train'), (xg_val, 'val')]
num_round = 5
bst = xgb.train(param, xg_train, num_round, watchlist)









    



[0]	train-merror:0.252935	val-merror:0.361328
[1]	train-merror:0.14861	val-merror:0.298828
[2]	train-merror:0.106076	val-merror:0.257812
[3]	train-merror:0.075695	val-merror:0.242188
[4]	train-merror:0.054892	val-merror:0.226562
Wall time: 14min 47s



In [44]:

    
model_name = run_name + '.bin'
bst.save_model(model_name)



In [45]:

    
bst0 = xgb.Booster({'nthread': 4})  # init model
bst0.load_model(model_name)  # load data



In [49]:

    
y_pred = bst0.predict(xg_val)
print(y_pred.shape)
print(y_pred[0:5])









    



(512,)
[  0.   5.  14.  94.   2.]



In [50]:

    
# do the same thing again, but output probabilities
param['objective'] = 'multi:softprob'
bst1 = xgb.train(param, xg_train, num_round, watchlist)
# Note: this convention has been changed since xgboost-unity
# get prediction, this is in 1D array, need reshape to (ndata, nclass)
# pred_prob = bst0.predict(xg_val).reshape(test_Y.shape[0], 6)
# pred_label = np.argmax(pred_prob, axis=1)
# error_rate = np.sum(pred_label != test_Y) / test_Y.shape[0]
# print('Test error using softprob = {}'.format(error_rate))









    



[0]	train-merror:0.252935	val-merror:0.361328
[1]	train-merror:0.14861	val-merror:0.298828
[2]	train-merror:0.106076	val-merror:0.257812
[3]	train-merror:0.075695	val-merror:0.242188
[4]	train-merror:0.054892	val-merror:0.226562



In [52]:

    
model_name = run_name + '_prob.bin'
bst1.save_model(model_name)



In [53]:

    
bst0 = xgb.Booster({'nthread': 4})  # init model
bst0.load_model(model_name)  # load data



In [51]:

    
y_pred = bst1.predict(xg_val)
print(y_pred.shape)
print(y_pred[0:5])









    



(512, 120)
[[ 0.2974295   0.00490525  0.00490315  0.00490537  0.00500645  0.00490574
   0.0049357   0.00490251  0.00490329  0.00490563  0.00490444  0.00490323
   0.00494976  0.00490511  0.00490492  0.00490526  0.0049048   0.00496223
   0.00490462  0.00490526  0.00490454  0.00490512  0.00577949  0.00490558
   0.00490477  0.00490458  0.0071401   0.00490558  0.00490444  0.00490545
   0.00502031  0.00490284  0.0049055   0.00490569  0.00490459  0.00563949
   0.00490418  0.00490501  0.00555472  0.00490506  0.00490551  0.00500069
   0.00490202  0.00490547  0.00490484  0.00490522  0.00490505  0.00528597
   0.00490526  0.00490467  0.00490415  0.00492774  0.00490459  0.00490551
   0.00662434  0.00490357  0.00490445  0.00490508  0.00589457  0.00490437
   0.00490517  0.00490458  0.00490286  0.00490502  0.04553157  0.00490419
   0.00490526  0.00490534  0.00490429  0.00490533  0.0049056   0.00490531
   0.00490403  0.0049024   0.00490438  0.00490484  0.00490572  0.00621409
   0.06089636  0.00490542  0.00490123  0.00490466  0.00490556  0.00490487
   0.00490427  0.0049045   0.0049055   0.00490346  0.00490342  0.00490521
   0.00490474  0.00490423  0.00490513  0.00490549  0.00528533  0.00490312
   0.01256359  0.00490426  0.00490406  0.00490497  0.00569648  0.00490533
   0.00490544  0.0049052   0.00490549  0.00496761  0.00494197  0.00490408
   0.00490494  0.00490511  0.00711655  0.00567312  0.00490527  0.00490543
   0.00490357  0.00528359  0.00536904  0.0049822   0.00490476  0.00500313]
 [ 0.00664251  0.00664275  0.0066399   0.00664292  0.00715593  0.13026524
   0.00664294  0.00663904  0.00664009  0.00664326  0.00664165  0.00975148
   0.00681564  0.00664256  0.007697    0.00664276  0.00664214  0.01204315
   0.00664189  0.00680158  0.00664179  0.00664257  0.00664207  0.00664319
   0.0066421   0.00664184  0.00664245  0.00933286  0.00664166  0.00854256
   0.00663859  0.00663948  0.00664309  0.02280047  0.00664186  0.00673126
   0.0066413   0.00664242  0.00735943  0.00664249  0.00664311  0.00664071
   0.0107274   0.00664305  0.0066422   0.0066427   0.01167289  0.00664258
   0.00664276  0.00751382  0.00664126  0.00664255  0.00664185  0.00727148
   0.00664281  0.00664047  0.00664166  0.00664252  0.00663977  0.00664155
   0.00664264  0.00664184  0.00663951  0.00963491  0.00664108  0.00664132
   0.00664277  0.00703968  0.00670006  0.00667426  0.00664322  0.00664283
   0.0066411   0.00663888  0.00664157  0.02073119  0.00664338  0.00667883
   0.01736585  0.00715883  0.00663731  0.00664195  0.00664317  0.00664223
   0.00675325  0.00664173  0.00664308  0.00664033  0.00664027  0.0066427
   0.00670577  0.00664136  0.00664258  0.00769393  0.00664221  0.00663986
   0.00664279  0.00664141  0.00664113  0.00664237  0.00664212  0.00664286
   0.00679436  0.00664268  0.00664307  0.00664133  0.00664307  0.00664115
   0.01184149  0.00664256  0.0066424   0.00664157  0.00664277  0.00664299
   0.00787519  0.00664094  0.00664299  0.00664273  0.00668845  0.00669705]
 [ 0.0051162   0.00511637  0.00511418  0.0051165   0.00511602  0.00511689
   0.00511652  0.00511352  0.05320547  0.00511677  0.00511553  0.00511426
   0.02889798  0.00511623  0.30715349  0.00511638  0.00511591  0.00511535
   0.00511571  0.00511639  0.00511564  0.00511624  0.00511585  0.00511672
   0.00511587  0.005234    0.00511615  0.00511672  0.00511553  0.00511659
   0.00511317  0.00741165  0.00511664  0.00511683  0.00511569  0.00511491
   0.00511526  0.00511613  0.00517258  0.00511617  0.00511665  0.00556918
   0.00511301  0.00511661  0.00511595  0.00511634  0.00640045  0.00511625
   0.00511638  0.00511577  0.00511523  0.00511622  0.00511569  0.00511665
   0.00511643  0.00511462  0.00511554  0.0051162   0.00511408  0.00511545
   0.00511629  0.00511567  0.00511388  0.00511613  0.00511509  0.00511527
   0.00511639  0.00511647  0.00780102  0.00511646  0.00527611  0.00511644
   0.00578488  0.0051134   0.00511547  0.00511595  0.00511686  0.00511676
   0.00511666  0.00511656  0.00511218  0.00511576  0.0051167   0.00533481
   0.00511535  0.00511559  0.00511663  0.00511451  0.00511447  0.00525322
   0.00511584  0.00511531  0.00511625  0.00627326  0.00511596  0.00511415
   0.0051164   0.00511535  0.00511513  0.00511609  0.00522371  0.00511646
   0.00511657  0.00511632  0.00551276  0.00511529  0.00571635  0.00679448
   0.00521185  0.00519542  0.00511611  0.00511547  0.00511639  0.00511656
   0.00511463  0.00511498  0.00511656  0.00511636  0.00511586  0.00511636]
 [ 0.00502803  0.00499871  0.00499657  0.00499884  0.00499836  0.00499921
   0.00499885  0.00499592  0.00499671  0.0049991   0.00499788  0.00499665
   0.00499837  0.00499857  0.00499838  0.00499872  0.00504145  0.00499771
   0.00499807  0.00499872  0.00499799  0.00499857  0.0049982   0.00499904
   0.00499822  0.00499803  0.00499849  0.00508885  0.00499789  0.00499892
   0.02519783  0.00499625  0.00499897  0.0052715   0.00499804  0.00499727
   0.00499762  0.00499847  0.00499857  0.00538681  0.00499898  0.00499717
   0.00499542  0.00499894  0.0049983   0.00499868  0.00499851  0.00499859
   0.00499872  0.00499812  0.00499759  0.01606635  0.00499804  0.00499898
   0.00499876  0.00545304  0.00499789  0.00565864  0.00499647  0.00499781
   0.00499863  0.00499803  0.00672605  0.00499847  0.00499746  0.00499763
   0.00499872  0.00499881  0.00504184  0.00499879  0.00499907  0.00499878
   0.00499747  0.00506755  0.00499782  0.00510018  0.0050289   0.00499909
   0.00499899  0.00499889  0.00499462  0.00499811  0.00499903  0.00499832
   0.00514893  0.00499795  0.00499896  0.00499689  0.00499685  0.00499867
   0.00499819  0.00499767  0.00499859  0.00499896  0.36800078  0.00614943
   0.00499874  0.0049977   0.00499749  0.00499843  0.00499824  0.00499879
   0.0049989   0.00499866  0.00499896  0.00566063  0.00499896  0.00499751
   0.00506728  0.00499857  0.00499845  0.00499782  0.00499873  0.00499889
   0.004997    0.00499735  0.00499889  0.0049987   0.00499821  0.00499869]
 [ 0.00524388  0.00524406  0.35276937  0.0052442   0.0052437   0.00524459
   0.00543621  0.00524114  0.00524197  0.00524447  0.0052432   0.0052419
   0.00524371  0.00524391  0.00524371  0.00524407  0.00524358  0.00524302
   0.00565103  0.00544742  0.00524331  0.00524392  0.00524353  0.00524441
   0.00524355  0.00524335  0.00532671  0.00524442  0.0052432   0.00524428
   0.00524078  0.00524149  0.00524433  0.00524453  0.00524336  0.00531893
   0.00529505  0.0062623   0.00524392  0.00524386  0.00524435  0.00524245
   0.00524061  0.0052443   0.00524363  0.01015866  0.00524385  0.00524394
   0.00524407  0.00524345  0.00529815  0.0052681   0.0056493   0.00524435
   0.00524412  0.00524227  0.00571694  0.00524388  0.0056489   0.00524312
   0.0145527   0.00530208  0.00524151  0.00524382  0.00524275  0.00524293
   0.00524408  0.00524417  0.00524303  0.00524415  0.00524444  0.00524413
   0.00524276  0.00524101  0.00524313  0.00524363  0.00524456  0.00524446
   0.00524435  0.00524425  0.00523977  0.00524344  0.0052444   0.00643719
   0.00524301  0.00524326  0.00533296  0.00524216  0.00524211  0.00524402
   0.00593787  0.00761592  0.00524393  0.00593885  0.00524364  0.00524179
   0.00524409  0.00524301  0.00524279  0.00524377  0.00524357  0.00524415
   0.00524427  0.00524401  0.00524432  0.00524295  0.00537048  0.00524281
   0.00524373  0.00524392  0.00524379  0.00524313  0.00524408  0.00524426
   0.00524227  0.00524264  0.00524426  0.00524405  0.00524354  0.00565101]]



In [ ]:



In [ ]:



In [ ]:

    
run_name0 = run_name + '_' + str(int(final_acc*10000)).zfill(4)



In [ ]:

Predict



In [ ]:

    
# Used to load model directly and skip train
# import os
# from keras.models import load_model
# cwd = os.getcwd()
# model = load_model(os.path.join(cwd, 'model', 'Dog_Breed_Identification_Train_20171024_155154.h5'))



In [ ]:

    
y_pred = model.predict(x_test, batch_size=128)
print(y_pred.shape)



In [ ]:

    
# print(y_pred[:10])
# y_pred = np.clip(y_pred, 0.005, 0.995)
# print(y_pred[:10])



In [ ]:

    
files = os.listdir(os.path.join(cwd, 'input', 'data_test', 'test'))
print(files[:10])



In [ ]:

    
cwd = os.getcwd()
df = pd.read_csv(os.path.join(cwd, 'input', 'labels.csv'))
print('lables amount: %d' %len(df))
df.head()



In [ ]:

    
n = len(df)
breed = set(df['breed'])
n_class = len(breed)
class_to_num = dict(zip(breed, range(n_class)))
num_to_class = dict(zip(range(n_class), breed))
print(breed)



In [ ]:

    
df2 = pd.read_csv('.\\input\\sample_submission.csv')
n_test = len(df2)
print(df2.shape)



In [ ]:

    
for i in range(0, 120):
    df2.iloc[:,[i+1]] = y_pred[:,i]
if not os.path.exists(output_path):
    os.mkdir(output_path)
pred_file = os.path.join(output_path, 'pred_' + run_name0 + '.csv')
df2.to_csv(pred_file, index=None)



In [ ]:



In [ ]:

    
print(run_name0)
print('Done !')



In [ ]:

	id	breed
0	000bec180eb18c7604dcecc8fe0dba07	boston_bull
1	001513dfcb2ffafc82cccf4d8bbaba97	dingo
2	001cdf01b096e06d78e9e5112d419397	pekinese
3	00214f311d5d2247d5dfe4fe24b2303d	bluetick
4	0021f9ceb3235effd7fcde7f7538ed62	golden_retriever